Load library

library(Seurat)
library(Azimuth)
library(future)
library(ggplot2)
library(ggprism)

data

# Load the reference
reference <- LoadReference(path = "human_bonemarrow/")
# Load the query object for mapping
query <- LoadFileInput(path = "../04_data_objects/SeuratObject_27subjects_ncounts_nfeatures_mito_3MAD_PCA_clustering.RDS")

# Preprocess with SCTransform
query <- SCTransform(
  object = query,
  assay = "RNA",
  new.assay.name = "refAssay",
  residual.features = rownames(x = reference$map),
  reference.SCT.model = reference$map[["refAssay"]]@SCTModel.list$refmodel,
  method = 'glmGamPoi',
  ncells = 2000,
  n_genes = 2000,
  do.correct.umi = FALSE,
  do.scale = FALSE,
  do.center = TRUE
)
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |======                                                                |   8%
  |                                                                            
  |============                                                          |  17%
  |                                                                            
  |==================                                                    |  25%
  |                                                                            
  |=======================                                               |  33%
  |                                                                            
  |=============================                                         |  42%
  |                                                                            
  |===================================                                   |  50%
  |                                                                            
  |=========================================                             |  58%
  |                                                                            
  |===============================================                       |  67%
  |                                                                            
  |====================================================                  |  75%
  |                                                                            
  |==========================================================            |  83%
  |                                                                            
  |================================================================      |  92%
  |                                                                            
  |======================================================================| 100%
# Find anchors between query and reference
anchors <- FindTransferAnchors(
  reference = reference$map,
  query = query,
  k.filter = NA,
  reference.neighbors = "refdr.annoy.neighbors",
  reference.assay = "refAssay",
  query.assay = "refAssay",
  reference.reduction = "refDR",
  normalization.method = "SCT",
  features = intersect(rownames(x = reference$map), VariableFeatures(object = query)),
  dims = 1:50,
  n.trees = 20,
  mapping.score.k = 100
)

# Transfer cell type labels and impute protein expression
refdata <- lapply(X = c("celltype.l1", "celltype.l2"), function(x) {
  reference$map[[x, drop = TRUE]]
})
names(x = refdata) <- c("celltype.l1", "celltype.l2")
if (FALSE) {
 refdata[["impADT"]] <- GetAssayData(
    object = reference$map[['ADT']],
    slot = 'data')}

#refdata[["impADT"]] <- GetAssayData(object = reference$map[['ADT']],
                                   # slot = 'data')

query <- TransferData(
  reference = reference$map,
  query = query,
  dims = 1:50,
  anchorset = anchors,
  refdata = refdata,
  n.trees = 20,
  store.weights = TRUE
)
## Warning: Keys should be one or more alphanumeric characters followed
## by an underscore, setting key from predictionscorecelltype.l1_ to
## predictionscorecelltypel1_
## Warning: Feature names cannot have underscores ('_'), replacing with dashes
## ('-')
## Warning: Keys should be one or more alphanumeric characters followed
## by an underscore, setting key from predictionscorecelltype.l2_ to
## predictionscorecelltypel2_
# Calculate the embeddings of the query data on the reference SPCA
query <- IntegrateEmbeddings(
  anchorset = anchors,
  reference = reference$map,
  query = query,
  reductions = "pcaproject",
  reuse.weights.matrix = TRUE
)
## Warning: Keys should be one or more alphanumeric characters followed by an
## underscore, setting key from integrated_dr_ to integrateddr_
## Warning: Keys should be one or more alphanumeric characters followed by an
## underscore, setting key from integrated_dr_ to integrateddr_
## Warning: All keys should be one or more alphanumeric characters followed by an
## underscore '_', setting key to integrateddr_
# Calculate the query neighbors in the reference
# with respect to the integrated embeddings
query[["query_ref.nn"]] <- FindNeighbors(
  object = Embeddings(reference$map[["refDR"]]),
  query = Embeddings(query[["integrated_dr"]]),
  return.neighbor = TRUE,
  l2.norm = TRUE
)


# The reference used in the app is downsampled compared to the reference on which
# the UMAP model was computed. This step, using the helper function NNTransform,
# corrects the Neighbors to account for the downsampling.
query <- Azimuth:::NNTransform(
  object = query,
  meta.data = reference$map[[]]
)
#saveRDS(reference, file = "Azimuth_reference_seurat_object.RDS")
# Project the query to the reference UMAP.
query[["proj.umap"]] <- RunUMAP(
  object = query[["query_ref.nn"]],
  reduction.model = reference$map[["refUMAP"]],
  reduction.key = 'UMAP_'
)
## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
## This message will be shown once per session
## Warning in RunUMAP.default(object = neighborlist, reduction.model =
## reduction.model, : Number of neighbors between query and reference is not equal
## to the number of neighbros within reference
## Warning: No assay specified, setting assay as RNA by default.
# Calculate mapping score and add to metadata
query <- AddMetaData(
  object = query,
  metadata = MappingScore(anchors = anchors),
  col.name = "mapping.score"
)

visualization using ref assay

# First predicted metadata field, change to visualize other predicted metadata
id <- c("celltype.l1", "celltype.l2")
predicted.id <- paste0("predicted.", id)

# DimPlot of the reference
p1 <- DimPlot(object = reference$plot, reduction = "refUMAP", group.by = id[1], label = TRUE, repel = T) + NoLegend()
## Rasterizing points since number of points exceeds 100,000.
## To disable this behavior set `raster=FALSE`
p2 <- DimPlot(object = reference$plot, reduction = "refUMAP", group.by = id[2], label = TRUE, repel = T) + NoLegend()
## Rasterizing points since number of points exceeds 100,000.
## To disable this behavior set `raster=FALSE`
#p3 <- DimPlot(object = reference$plot, reduction = "refUMAP", group.by = id[3], label = TRUE, repel = T) + NoLegend()
p1 + p2 

 ggsave("Azimuth_dimplot_reference.pdf", width =16, height = 5)

# DimPlot of the query, colored by predicted cell type
pp1 <- DimPlot(object = query, reduction = "proj.umap", group.by = predicted.id[1], label = TRUE, repel = T) + NoLegend()
pp2 <- DimPlot(object = query, reduction = "proj.umap", group.by = predicted.id[2], label = TRUE, repel = T,label.size = 3) + NoLegend()
#pp3 <- DimPlot(object = query, reduction = "proj.umap", group.by = predicted.id[3], label = TRUE, repel = T,label.size = 3) + NoLegend()
pp1 + pp2 

ggsave("Azimuth_dimplot_query.pdf", width =16, height = 5)

p_group <- DimPlot(object = query, reduction = "proj.umap", group.by = "group_id", label = F, repel = T)
p_id <- DimPlot(object = query, reduction = "proj.umap", group.by = "orig.ident", label = F, repel = T)
p_group + p_id

ggsave("Azimuth_dimplot_diagnosis_sample.pdf", width = 15, height = 4)

cluster_p <- DimPlot(object = query, reduction = "proj.umap", group.by = "seurat_clusters", label = TRUE, repel = T) + NoLegend()
cluster_p + pp2
## Warning: ggrepel: 19 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

ggsave("clusters_cell_types2_annotation_dimplot.pdf", width =14, height = 7)
## Warning: ggrepel: 21 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
cluster_p + pp1
## Warning: ggrepel: 19 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

ggsave("clusters_cell_types1_annotation_dimplot.pdf", width =14, height = 7)
## Warning: ggrepel: 21 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
#cluster_p + pp3
#ggsave("clusters_cell_types3_annotation_dimplot.pdf", width =14, height = 7)

FeaturePlot(object = query, features = paste0(predicted.id, ".score"), reduction = "proj.umap",ncol = 2)

ggsave("featureplot_predicted_score.pdf", width=16, height=4)

VlnPlot(object = query, 
        features = paste0(predicted.id, ".score"), 
        group.by = predicted.id,
        pt.size = 0) +
  NoLegend()

ggsave("vlnplot_predicted_score.pdf", width=16, height=6)

# Plot the mapping score
FeaturePlot(object = query, features = "mapping.score", reduction = "proj.umap")

VlnPlot(object = query, features = "mapping.score", group.by = predicted.id) + NoLegend()

#saveRDS(query, file = "query_mapped.RDS")

visualization using RNA assay

DefaultAssay(query) <- "RNA"
pbmc <- FindVariableFeatures(query, selection.method = "vst", nfeatures = 2000)
all.genes <- rownames(pbmc)
pbmc <- ScaleData(pbmc, features = all.genes)
## Centering and scaling data matrix
pbmc <- RunPCA(pbmc, features = VariableFeatures(object = pbmc), npcs = 50)
## PC_ 1 
## Positive:  SELM, C1S, PLAC9, C1R, PCOLCE, COL1A2, NNMT, SERPING1, COL6A2, NGFRAP1 
##     CCDC80, ISLR, LIMA1, PTRF, PPIC, NBL1, APP, FSTL1, SDC2, EFEMP2 
##     CALD1, MXRA8, CFI, LGALS3BP, FAM114A1, PRKCDBP, FKBP10, CAV1, COL6A1, SCARA5 
## Negative:  HLA-DRA, CD74, HLA-DRB1, C1QA, HLA-DPB1, HLA-DPA1, HLA-DMB, HLA-DQB1, ITGB2, CD83 
##     C1QC, C1QB, VSIG4, CD14, HCST, HLA-DQA1, LYZ, MNDA, ALOX5AP, PLAUR 
##     FCGR3A, RNASE1, MS4A4A, OLR1, GPR183, CPVL, IFI30, FOLR2, HLA-DRB5, CSTA 
## PC_ 2 
## Positive:  ECSCR.1, TM4SF1, PLVAP, ACKR1, GNG11, ADGRL4, NPDC1, PECAM1, RAMP2, EMCN 
##     PCAT19, VWF, CLEC14A, BCAM, ESAM, JAM2, PDLIM1, CYYR1, MMRN2, NRN1 
##     ETS2, ERG, GIMAP7, CAV2, CXorf36, NOSTRIN, AQP1, MALL, PTPRB, MYCT1 
## Negative:  HLA-DRA, FN1, CTSB, HLA-DRB1, VSIG4, HLA-DMB, HLA-DPB1, GPNMB, HLA-DPA1, CTSD 
##     C1QA, CAPG, CD74, HLA-DQA1, FTL, ITGB2, HLA-DRB5, C1QC, C1QB, HLA-DQB1 
##     CD14, PLTP, TREM1, FCGR3A, MARCO, TGFBI, MS4A4A, PCOLCE, LYZ, CD83 
## PC_ 3 
## Positive:  CD74, HLA-DRA, HLA-DRB1, HLA-DPA1, HLA-DPB1, HLA-DMB, HLA-DQA1, HLA-DQB1, HLA-DRB5, ITGB2 
##     CAPG, VSIG4, PLAUR, CPVL, CTSB, TUBA1B, CD83, CD14, MNDA, LYZ 
##     FCGR2B, IFI30, FCGR3A, C1QC, C1QA, C1QB, MARCO, CSTB, STX11, FTL 
## Negative:  DCN, FBLN1, AEBP1, SRPX, COL14A1, PODN, BICC1, PDGFRL, DPT, ECM2 
##     COL3A1, OLFML3, COL1A1, WISP2, MFAP4, CXCL12, THY1, FGF7, THBS2, EFEMP1 
##     CP, MXRA5, COL1A2, COL6A2, SFRP1, COL6A1, MMP2, ABCA8, NOVA1, ANGPTL5 
## PC_ 4 
## Positive:  ITGBL1, FN1, PRG4, ERRFI1, ITGB8, CLIC5, GABRA4, CRTAC1, SEMA5A, DIRC3 
##     MT2A, CAV1, TMEM196, AK1, SPARCL1, SOX5, NTN4, VKORC1, PPP1R1A, HTRA1 
##     DNASE1L3, HBEGF, ZNF385B, FGF10, ANK3, BTC, MTUS2, SEMA3A, VEGFC, SLC2A12 
## Negative:  DCN, SERPINF1, FBLN2, PDGFRL, GSN, FBLN1, VCAN, PODN, FBLN5, CFD 
##     MFAP4, MFAP5, OLFML3, AEBP1, THY1, MGST1, DPT, SRPX, TUBA1A, C3 
##     ECM2, FAM180B, WISP2, ASPN, MMP2, SGCE, FHL1, CNN3, FBN1, SMOC2 
## PC_ 5 
## Positive:  RNASE1, C1QA, C1QB, ALDH1A1, FTL, MARCO, FOLR2, C1QC, GPNMB, LGMN 
##     NUPR1, LILRB5, FCGR3A, TIMD4, MS4A4A, CTSD, TREM2, IFI27, CTSB, SEPP1 
##     PLTP, HPGDS, MSR1, FUCA1, CD163, GCHFR, RBP4, GPR34, VSIG4, APOC1 
## Negative:  CREM, CORO1A, IL2RG, CXCR4, CD48, JAML, CLEC10A, CD52, ICAM3, RAC2 
##     SAMSN1, RILPL2, CYTIP, GPR183, ISG20, GPAT3, CD3D, IL32, FCN1, FCER1A 
##     LGALS2, CST7, TRAC, CFP, CD1C, CD2, TUBA4A, STX11, HCST, DUSP4
pbmc <- RunUMAP(pbmc, dims = 1:50)
## 15:18:28 UMAP embedding parameters a = 0.9922 b = 1.112
## 15:18:28 Read 59845 rows and found 50 numeric columns
## 15:18:28 Using Annoy for neighbor search, n_neighbors = 30
## 15:18:28 Building Annoy index with metric = cosine, n_trees = 50
## 0%   10   20   30   40   50   60   70   80   90   100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 15:18:37 Writing NN index file to temp file /tmp/Rtmpq8FfHN/file36e1aa7b50e7d0
## 15:18:37 Searching Annoy index using 1 thread, search_k = 3000
## 15:19:01 Annoy recall = 100%
## 15:19:02 Commencing smooth kNN distance calibration using 1 thread
## 15:19:05 Initializing from normalized Laplacian + noise
## 15:19:09 Commencing optimization for 200 epochs, with 2650316 positive edges
## 15:19:34 Optimization finished
## Warning: Cannot add objects with duplicate keys (offending key: UMAP_), setting
## key to 'umap_'
query <- pbmc
# DimPlot of the query, colored by predicted cell type
pp1 <- DimPlot(object = query, reduction = "umap", group.by = predicted.id[1], label = TRUE, repel = T) + NoLegend()
pp2 <- DimPlot(object = query, reduction = "umap", group.by = predicted.id[2], label = TRUE, repel = T,label.size = 3) + NoLegend()
#pp3 <- DimPlot(object = query, reduction = "proj.umap", group.by = predicted.id[3], label = TRUE, repel = T,label.size = 3) + NoLegend()
pp1 + pp2 

ggsave("Azimuth_dimplot_query_umap.pdf", width =16, height = 5)
## Warning: ggrepel: 3 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
p_group <- DimPlot(object = query, reduction = "umap", group.by = "group_id", label = F, repel = T)
p_id <- DimPlot(object = query, reduction = "umap", group.by = "orig.ident", label = F, repel = T)
p_group + p_id

ggsave("Azimuth_dimplot_diagnosis_sample_umap.pdf", width = 15, height = 4)

cluster_p <- DimPlot(object = query, reduction = "umap", group.by = "seurat_clusters", label = TRUE, repel = T)+NoLegend()
cluster_p + pp2

ggsave("clusters_cell_types2_annotation_dimplot_umap.pdf", width =14, height = 7)


cluster_p + pp1

ggsave("clusters_cell_types1_annotation_dimplot_umap.pdf", width =14, height = 7)